# REPLICATION
# Place-Based Campaigning: The Political Impact of Real Grassroots Mobilization
# Daniel Bischof and Thomas Kurer
# Journal of Politics

# Extend main data
  # M5S activity in neighboring municipalities
  # ADSL coverage in 2018

# load data

shape <- sf::st_read("./../../data_original/Shapefile/Limiti_2016_ED50_g/Com2016_ED50_g/Com2016_ED50_g.shp", stringsAsFactors=FALSE)

# transform coordinates to lat/lon
shape <- sf::st_transform(shape, "+proj=longlat +ellps=WGS84 +datum=WGS84")
class(shape)
str(shape)
sf::st_crs(shape)

# (1) identify neighboring municipalities ----

row.names(shape) <- as.character(shape$PRO_COM)
nb <- poly2nb(shape)
mat <- nb2mat(nb, style="B", zero.policy = TRUE)
colnames(mat) <- rownames(mat)
mat


neighbor <- list()
for (i in 1:nrow(mat)) {
  neighbor[i] <- list(names(which(mat[i,]==1)))
}


library(plyr)
library(dplyr)
df_nb <- plyr::rbind.fill(lapply(neighbor, function(x) as.data.frame(t(x))))
comune_id <- shape$PRO_COM
comune_nb <- cbind(comune_id, df_nb)
comune_nb <- apply(comune_nb, 2, as.numeric)
write.table(comune_nb, "./../../data_coded/neighbors_by_comune-id.csv", row.names = FALSE, na=".")

# sum over neighbors

# create vector of neighboring comune ids as separate columns by comune

events_comune_full <- read.csv("./../../data_coded/events_comune.csv", stringsAsFactors = FALSE)

events_comune <- events_comune_full %>% dplyr::select(comune_id, n_treat_157d)

df <- merge(comune_nb, events_comune, by=c("comune_id"))

merge <- merge(df, events_comune, by.x=c("V1"), by.y=c("comune_id"), all=T)
colnames(merge)[length(merge)] <- paste0("N1")

# merge nr of events by neighboring comune id for each comune
for (i in 2:29) {

merge <- merge(merge, events_comune, by.x=c(paste0("V", i)), by.y=c("comune_id"), all=T)
colnames(merge)[length(merge)] <- paste0("N", i)

}

merge <- merge %>% dplyr::select(comune_id, V1:V29, N1:N29) %>% arrange(comune_id)


merge$n_neigh_treat_157d <- rowSums(merge[,which(colnames(merge)=="N1"):which(colnames(merge)=="N29")], na.rm=TRUE)

# same for n_pre_157d

events_comune2 <- events_comune_full %>% dplyr::select(comune_id, n_pre_157d)

df2 <- merge(comune_nb, events_comune2, by=c("comune_id"))

merge2 <- merge(df2, events_comune2, by.x=c("V1"), by.y=c("comune_id"), all=T)
colnames(merge2)[length(merge2)] <- paste0("N1")

# merge nr of events by neighboring comune id for each comune
for (i in 2:29) {

merge2 <- merge(merge2, events_comune2, by.x=c(paste0("V", i)), by.y=c("comune_id"), all=T)
colnames(merge2)[length(merge2)] <- paste0("N", i)

}

merge2 <- merge2 %>% dplyr::select(comune_id, V1:V29, N1:N29) %>% arrange(comune_id)


merge2$n_neigh_pre_157d <- rowSums(merge2[,which(colnames(merge2)=="N1"):which(colnames(merge2)=="N29")], na.rm=TRUE)

# same for wn_treat_157d

events_comune_wn <- events_comune_full %>% dplyr::select(comune_id, wn_treat_157d)

df_wn <- merge(comune_nb, events_comune_wn, by=c("comune_id"))

merge_wn <- merge(df_wn, events_comune_wn, by.x=c("V1"), by.y=c("comune_id"), all=T)
colnames(merge_wn)[length(merge_wn)] <- paste0("N1")

# merge nr of events by neighboring comune id for each comune
for (i in 2:29) {

merge_wn <- merge(merge_wn, events_comune_wn, by.x=c(paste0("V", i)), by.y=c("comune_id"), all=T)
colnames(merge_wn)[length(merge_wn)] <- paste0("N", i)

}

merge_wn <- merge_wn %>% dplyr::select(comune_id, V1:V29, N1:N29) %>% arrange(comune_id)


merge_wn$wn_neigh_treat_157d <- rowSums(merge_wn[,which(colnames(merge_wn)=="N1"):which(colnames(merge_wn)=="N29")], na.rm=TRUE)

# same for wn_pre_157d

events_comune_wnpre <- events_comune_full %>% dplyr::select(comune_id, wn_pre_157d)

df_wnpre <- merge(comune_nb, events_comune_wnpre, by=c("comune_id"))

merge_wnpre <- merge(df_wnpre, events_comune_wnpre, by.x=c("V1"), by.y=c("comune_id"), all=T)
colnames(merge_wnpre)[length(merge_wnpre)] <- paste0("N1")

# merge nr of events by neighboring comune id for each comune
for (i in 2:29) {

merge_wnpre <- merge(merge_wnpre, events_comune_wnpre, by.x=c(paste0("V", i)), by.y=c("comune_id"), all=T)
colnames(merge_wnpre)[length(merge_wnpre)] <- paste0("N", i)

}

merge_wnpre <- merge_wnpre %>% dplyr::select(comune_id, V1:V29, N1:N29) %>% arrange(comune_id)

merge_wnpre$wn_neigh_pre_157d <- rowSums(merge_wnpre[,which(colnames(merge_wnpre)=="N1"):which(colnames(merge_wnpre)=="N29")], na.rm=TRUE)

# same for wn_treat_campaign

events_comune_wncamp <- events_comune_full %>% dplyr::select(comune_id, wn_treat_campaign)

df_wncamp <- merge(comune_nb, events_comune_wncamp, by=c("comune_id"))

merge_wncamp <- merge(df_wncamp, events_comune_wncamp, by.x=c("V1"), by.y=c("comune_id"), all=T)
colnames(merge_wncamp)[length(merge_wncamp)] <- paste0("N1")

# merge nr of events by neighboring comune id for each comune
for (i in 2:29) {

merge_wncamp <- merge(merge_wncamp, events_comune_wncamp, by.x=c(paste0("V", i)), by.y=c("comune_id"), all=T)
colnames(merge_wncamp)[length(merge_wncamp)] <- paste0("N", i)

}

merge_wncamp <- merge_wncamp %>% dplyr::select(comune_id, V1:V29, N1:N29) %>% arrange(comune_id)


merge_wncamp$wn_neigh_treat_campaign <- rowSums(merge_wncamp[,which(colnames(merge_wncamp)=="N1"):which(colnames(merge_wncamp)=="N29")], na.rm=TRUE)

# same for n_treat_campaign

events_comune_ncamp <- events_comune_full %>% dplyr::select(comune_id, n_treat_campaign)

df_ncamp <- merge(comune_nb, events_comune_ncamp, by=c("comune_id"))

merge_ncamp <- merge(df_ncamp, events_comune_ncamp, by.x=c("V1"), by.y=c("comune_id"), all=T)
colnames(merge_ncamp)[length(merge_ncamp)] <- paste0("N1")

# merge nr of events by neighboring comune id for each comune
for (i in 2:29) {

merge_ncamp <- merge(merge_ncamp, events_comune_ncamp, by.x=c(paste0("V", i)), by.y=c("comune_id"), all=T)
colnames(merge_ncamp)[length(merge_ncamp)] <- paste0("N", i)

}

merge_ncamp <- merge_ncamp %>% dplyr::select(comune_id, V1:V29, N1:N29) %>% arrange(comune_id)


merge_ncamp$n_neigh_treat_campaign <- rowSums(merge_ncamp[,which(colnames(merge_ncamp)=="N1"):which(colnames(merge_ncamp)=="N29")], na.rm=TRUE)

# merge with main data

n_neigh_treat <- merge %>% dplyr::select(comune_id, n_neigh_treat_157d) %>% filter(!is.na(comune_id))
n_neigh_pre <- merge2 %>% dplyr::select(comune_id, n_neigh_pre_157d) %>% filter(!is.na(comune_id))
n_neigh_campaign <- merge_ncamp %>% dplyr::select(comune_id, n_neigh_treat_campaign) %>% filter(!is.na(comune_id))

wn_neigh_treat <- merge_wn %>% dplyr::select(comune_id, wn_neigh_treat_157d) %>% filter(!is.na(comune_id))
wn_neigh_pre <- merge_wnpre %>% dplyr::select(comune_id, wn_neigh_pre_157d) %>% filter(!is.na(comune_id))
wn_neigh_campaign <- merge_wncamp %>% dplyr::select(comune_id, wn_neigh_treat_campaign) %>% filter(!is.na(comune_id))

  
final <- merge(events_comune_full, n_neigh_treat, by=c("comune_id"), all.x=TRUE)
final <- merge(final, n_neigh_pre, by=c("comune_id"), all.x=TRUE)
final <- merge(final, n_neigh_campaign, by=c("comune_id"), all.x=TRUE)

final <- merge(final, wn_neigh_treat, by=c("comune_id"), all.x=TRUE)
final <- merge(final, wn_neigh_pre, by=c("comune_id"), all.x=TRUE)
final <- merge(final, wn_neigh_campaign, by=c("comune_id"), all.x=TRUE)

# merge complete region codes

codes <- shape %>% dplyr::select(COD_REG, COD_CM, COD_PRO, PRO_COM)
codes <- as.data.frame(codes)
codes <- codes[,1:4]
rownames(codes) <- c()

final <- merge(final, codes, by.x="comune_id", by.y="PRO_COM")

# (2) add ADSL info ----
# collected from: http://bandaultralarga.italia.it/mappa-bul/regione [Sept 3, 2019]


banda <- read.csv("./../../data_original/M5S/2_nga.csv", stringsAsFactors = FALSE)

# some preparation for clean merge

namematch <- final %>% dplyr::select(comune_name.y, comune_id)
library(stringi)
namematch$comune_name <- tolower(stri_trans_general(namematch$comune_name.y,"Latin-ASCII"))
namematch <- namematch[,2:3]

banda2 <- merge(banda, namematch, by.x="com", by.y="comune_name", all.x=TRUE)

banda2$nonmatch <- ifelse(banda2$nrcom %in% setdiff(banda$nrcom, final$comune_id), 1, 0)
banda2$comune_id_final <- ifelse( banda2$nonmatch==1 & banda2$nrreg==20 & (!is.na(banda2$comune_id)), banda2$comune_id, banda2$nrcom)

bandalarga <- banda2 %>% dplyr::select(com, comune_id_final, coverage) %>%
  dplyr::rename(comune_name_simp=com,
                comune_id=comune_id_final,
                nga_coverage = coverage) %>%
  dplyr::mutate(nga_dummy=ifelse(nga_coverage>0,1,0))


bandalarga <- bandalarga[!duplicated(bandalarga$comune_id),]
# merge internet

finalids <- events_comune$comune_id

final <- merge(final, bandalarga, by="comune_id", all.x=TRUE)

# fix issues with all.x merge

final <- final %>% filter(comune_id %in% finalids)
final <- final[!duplicated(final$comune_id),]

final <- final %>% dplyr::rename(comune_name=comune_name.y, comune_population = pop) %>%
  dplyr::select(comune_id, comune_name, comune_name_simp, COD_REG, COD_CM, COD_PRO, comune_population,
                region_id, terunit_id, province_id,
                group_id, first_event, 
                starts_with("n_"),
                starts_with("wn_"),
                starts_with("hist_"),
                n_neigh_treat_157d, n_neigh_pre_157d,
                nga_coverage, nga_dummy)

# (3) write extended data ----
write.table(final, "./../../data_coded/events_comune_neigh_nga.csv", sep=",", na=".", row.names = FALSE)
